#### Predictions: model fit, out of sample this time. 
####
dat1<-dget("C:/XunCao/Research/Portfolio/data/data_wd_nomissing_polity2001")[[3]]
dat2<-dget("C:/XunCao/Research/Portfolio/data/data_wd_nomissing_polity2002")[[3]]
dat3<-dget("C:/XunCao/Research/Portfolio/data/data_wd_nomissing_polity2003")[[3]]
dat4<-dget("C:/XunCao/Research/Portfolio/data/data_wd_nomissing_polity2004")[[3]]
dat5<-dget("C:/XunCao/Research/Portfolio/data/data_wd_nomissing_polity2005")[[3]]

dada<-rbind(dat1, dat2, dat3, dat4, dat5)
cor(dada)

plot(dada[, 1], dada[, 3], xlim=c(-10, 10), xlab="polity", ylab="capital market openness")



yr<-2002

burn<-100 

dd <- c(paste("C:/XunCao/Research/Portfolio/mcmc_net_polity/", as.character(yr), "try 2/", sep=""))
setwd(dd)


out<-read.table("OUT", header=T)
# nss<-dim(out)[1] 



## need to run a basic gravity model here:
## but first, need to plug in the lagged latent positions: 

dat<-dget("data_wd_nomissing_polity2002")

Xs<-na.omit(dat[[3]])
Xs[, 4]<-log(Xs[, 4])
Xs[, 5]<-log(Xs[, 5])
Xr<-Xs

Yd<-dat[[1]][rownames(Xs), rownames(Xs)]
Yd[is.na(Yd)]<-0 # be consistent with Xd
diag(Yd)<-NA
Yd<-log(Yd+1)

Xd<-dat[[2]][rownames(Xs), rownames(Xs), ] 


####
####

OUT=read.table("C:/XunCao/Research/Portfolio/mcmc_net_polity/2001try 2/OUT",header=T) 
U=read.table("C:/XunCao/Research/Portfolio/mcmc_net_polity/2001try 2/U")
V=read.table("C:/XunCao/Research/Portfolio/mcmc_net_polity/2001try 2/V")
 
nss=dim(OUT)[1]
n=dim(U)[1]/nss
k<-3
nburn<-100
aa<-colnames(Yd)

PU<-array(dim=c(n,k,nss))
for(i in 1:nss) { PU[,,i]<-as.matrix(U[ ((i-1)*n+1):(i*n) ,])  }
PU<-PU[,,-(1:nburn)];rownames(PU)=aa;     #drop burn in
PV<-array(dim=c(n,k,nss))
for(i in 1:nss) { PV[,,i]<-as.matrix(V[ ((i-1)*n+1):(i*n) ,])  }
PV<-PV[,,-(1:nburn)]; rownames(PV)=aa; 

OUT=OUT[-(1:nburn),]

 
#find posterior mean of U %*% t(V)
UTV<-matrix(0,n,n)
for(i in 1:dim(PU)[3] ) { 
    UTV<-UTV+PU[,,i]%*%t(PV[,,i]) 
}
UTV<-UTV/dim(PU)[3]  
tmp<-svd(UTV)
 
u2=tmp$u[,1:k];rownames(u2)=aa;
v2=tmp$v[,1:k];rownames(v2)=aa;



Xd[,,2]<-UTV # plug in the UTV from the latent space of the previous year.  

yx<-matrix(NA, nrow=n*(n-1), ncol=3+1+dim(dat[[2]])[3]+2*dim(dat[[3]])[2] )
 

rw<-1
for(i in 1:n){
for(j in (1:n)[-i] ){
yx[rw,]<-c(1, i, j,
           Yd[i,j],
           Xd[i,j, 1], Xd[i,j, 2], Xd[i,j, 3], # geo, lagged UV, and corrGDP
           Xs[i, 1], Xs[i, 2], Xs[i, 3], Xs[i, 4], Xs[i, 5],
           Xr[j, 1], Xr[j, 2], Xr[j, 3], Xr[j, 4], Xr[j, 5])
rw<-rw+1             }}


yx<-data.frame(year=rep(i, n*(n-1)), 
               sender=rownames(Yd)[yx[,2]], 
               receiver=colnames(Yd)[yx[,3]], 
               portf=yx[,4], 
               distance=yx[,5], laglatent=yx[,6], corGDP=yx[,7], 
               polity.s=yx[,8], corrupt.s=yx[,9], CapOpen.s=yx[,10], GDP.s=yx[, 11],GDPPC.s=yx[,12],
               polity.r=yx[,13], corrupt.r=yx[,14], CapOpen.r=yx[,15], GDP.r=yx[, 16],GDPPC.r=yx[,17]
               )

fit<-lm(portf~distance + laglatent + corGDP +  
               polity.s + corrupt.s + CapOpen.s + GDP.s + GDPPC.s + 
               polity.r + corrupt.r + CapOpen.r + GDP.r + GDPPC.r              
              #+as.factor(sender)
              #+as.factor(receiver)
              , data=yx)
summary(fit)


library(xtable)
xtable(summary(fit), digits=c(4,3, 3, 2, 2))



## 
## this is out-sample prediction:
yr<-2002+1

burn<-100 

dd <- c(paste("C:/XunCao/Research/Portfolio/mcmc_net_polity/", as.character(yr), "try 2/", sep=""))
setwd(dd)


out<-read.table("OUT", header=T)
# nss<-dim(out)[1] 



## need to run a basic gravity model here:
## but first, need to plug in the lagged latent positions: 

dat<-dget(paste("data_wd_nomissing_polity", as.character(yr), sep=""))

Xs<-na.omit(dat[[3]])
Xs[, 4]<-log(Xs[, 4])
Xs[, 5]<-log(Xs[, 5])
Xr<-Xs

Yd<-dat[[1]][rownames(Xs), rownames(Xs)]
Yd[is.na(Yd)]<-0 # be consistent with Xd
diag(Yd)<-NA
Yd<-log(Yd+1)

Xd<-dat[[2]][rownames(Xs), rownames(Xs), ] 


####
####

OUT=read.table("C:/XunCao/Research/Portfolio/mcmc_net_polity/2001try 2/OUT",header=T) 
U=read.table("C:/XunCao/Research/Portfolio/mcmc_net_polity/2001try 2/U")
V=read.table("C:/XunCao/Research/Portfolio/mcmc_net_polity/2001try 2/V")
 
nss=dim(OUT)[1]
n=dim(U)[1]/nss
k<-3
nburn<-100
aa<-colnames(Yd)

PU<-array(dim=c(n,k,nss))
for(i in 1:nss) { PU[,,i]<-as.matrix(U[ ((i-1)*n+1):(i*n) ,])  }
PU<-PU[,,-(1:nburn)];rownames(PU)=aa;     #drop burn in
PV<-array(dim=c(n,k,nss))
for(i in 1:nss) { PV[,,i]<-as.matrix(V[ ((i-1)*n+1):(i*n) ,])  }
PV<-PV[,,-(1:nburn)]; rownames(PV)=aa; 

OUT=OUT[-(1:nburn),]

 
#find posterior mean of U %*% t(V)
UTV<-matrix(0,n,n)
for(i in 1:dim(PU)[3] ) { 
    UTV<-UTV+PU[,,i]%*%t(PV[,,i]) 
}
UTV<-UTV/dim(PU)[3]  
tmp<-svd(UTV)
 
u2=tmp$u[,1:k];rownames(u2)=aa;
v2=tmp$v[,1:k];rownames(v2)=aa;



Xd[,,2]<-UTV # plug in the UTV from the latent space of the previous year.  

yx<-matrix(NA, nrow=n*(n-1), ncol=3+1+dim(dat[[2]])[3]+2*dim(dat[[3]])[2] )
 

rw<-1
for(i in 1:n){
for(j in (1:n)[-i] ){
yx[rw,]<-c(1, i, j,
           Yd[i,j],
           Xd[i,j, 1], Xd[i,j, 2], Xd[i,j, 3], # geo, lagged UV, and corrGDP
           Xs[i, 1], Xs[i, 2], Xs[i, 3], Xs[i, 4], Xs[i, 5],
           Xr[j, 1], Xr[j, 2], Xr[j, 3], Xr[j, 4], Xr[j, 5])
rw<-rw+1             }}


yx<-data.frame(year=rep(i, n*(n-1)), 
               sender=rownames(Yd)[yx[,2]], 
               receiver=colnames(Yd)[yx[,3]], 
               portf=yx[,4], 
               distance=yx[,5], laglatent=yx[,6], corGDP=yx[,7], 
               polity.s=yx[,8], corrupt.s=yx[,9], CapOpen.s=yx[,10], GDP.s=yx[, 11],GDPPC.s=yx[,12],
               polity.r=yx[,13], corrupt.r=yx[,14], CapOpen.r=yx[,15], GDP.r=yx[, 16],GDPPC.r=yx[,17]
               )


 
pred<-fit$coef[1]+ fit$coef[2]*yx$distance + fit$coef[3]*yx$laglatent + fit$coef[4]*yx$corGDP+fit$coef[5]*yx$polity.s + fit$coef[6]*yx$corrupt.s  +fit$coef[7]*yx$CapOpen.s + fit$coef[8]*yx$GDP.s + fit$coef[9]*yx$GDPPC.s  +fit$coef[10]*yx$polity.r + fit$coef[11]*yx$corrupt.r  +fit$coef[12]*yx$CapOpen.r + fit$coef[13]*yx$GDP.r + fit$coef[14]*yx$GDPPC.r

par(mfrow=c(1,2))     
plot(yx$portf, pred, col="gray", ylim=c(-5, 15), main="OLS prediction", xlab="actual value of portfolio investment", ylab="predicted value of portfolio investment") 
lines(-100:100, -100:100, lwd=1, col="black")
rmse<-(sum((yx$portf-pred)^2, na.rm=T)/(n*(n-1)))^.5

rmse

mean(yx$portf)






## this is going to be a little bit complicated: 
## now the latent model:

yr<-2002

burn<-100 

dd <- c(paste("C:/XunCao/Research/Portfolio/mcmc_net_polity/", as.character(yr), "try 2/", sep=""))
setwd(dd)


out<-read.table("OUT", header=T)

nss<-dim(out)[1] 
out<-out[-c(1:burn),-c(1,2)]
means<-apply(out,2,mean)

wg.names<-as.character(rownames(Yd))
A<-read.table("A",sep=" ",col.names=wg.names)
B<-read.table("B",sep=" ",col.names=wg.names)

A<-A[-c(1:burn),]
B<-B[-c(1:burn),]

a<-apply(A,2,mean) # mean random effect for sender
b<-apply(B,2,mean) # mean random effect for receiver


## now the latent part: actually this time this is the \delta latent part of the model: 
OUT=read.table("OUT",header=T) 
U=read.table("U")
V=read.table("V")
 
nss=dim(OUT)[1]
n=dim(U)[1]/nss
k<-3
nburn<-100
aa<-colnames(Yd)

PU<-array(dim=c(n,k,nss))
for(i in 1:nss) { PU[,,i]<-as.matrix(U[ ((i-1)*n+1):(i*n) ,])  }
PU<-PU[,,-(1:nburn)];rownames(PU)=aa;     
PV<-array(dim=c(n,k,nss))
for(i in 1:nss) { PV[,,i]<-as.matrix(V[ ((i-1)*n+1):(i*n) ,])  }
PV<-PV[,,-(1:nburn)]; rownames(PV)=aa; 


#find posterior mean of Z %*% t(Z)
UTV<-matrix(0,n,n)
for(i in 1:dim(PU)[3] ) { ## Do not change k here
    UTV<-UTV+PU[,,i]%*%t(PV[,,i]) 
}
UTV<-UTV/dim(PU)[3]  




# Xd has the dyadic variables: Xd has been defined in the ols part of the prediction;
bd<-means[2:4]
dyadic.part<-bd[1]*Xd[,,1]+bd[2]*Xd[,,2]+bd[3]*Xd[,,3]


c.p<-matrix(NA,nrow=length(wg.names),ncol=length(wg.names))
for (i in 1:length(wg.names)) 
    for (j in 1:length(wg.names))
    {
    c.p[i,j]<-     a[i] + b[j] + 
                        means[ 6]*Xr[i,1]+
                        means[ 7]*Xr[i,2]+
                        means[ 8]*Xr[i,3]+
                        means[ 9]*Xr[i,4]+
                        means[10]*Xr[i,5]+
                        means[11]*Xr[j,1]+
                        means[12]*Xr[j,2]+ 
                        means[13]*Xr[j,3]+
                        means[14]*Xr[j,4]+ 
                        means[15]*Xr[j,5] 
                        + UTV[i,j]
    }
theta.hat<-dyadic.part + c.p + means[5]  
quantile(theta.hat, c(.025, .5, .975))

pred<-theta.hat
diag(pred)<-NA
quantile(pred, c(.025, .5, .975), na.rm=T)

diag(Yd)<-NA

rmse.lt<-(sum((Yd-pred)^2, na.rm=T)/(n*(n-1)))^.5
rmse.lt





plot((as.vector(Yd)), as.vector(pred),  
     col="grey", ylim=c(-5, 15), main="Latent model prediction", xlab="actual value of portfolio investment", ylab="predicted value of portfolio investment")#,  ylim=c(min(c(as.vector(Yd), as.vector(pred)), na.rm=T), max(c(as.vector(Yd), as.vector(pred)), na.rm=T)),
    # xlim=c(min(c(as.vector(Yd), as.vector(pred)), na.rm=T), max(c(as.vector(Yd), as.vector(pred)), na.rm=T)))#,xaxt="n",las=1)
lines(-100:100, -100:100, lwd=1, col="black")

lines((min(c(as.vector(Yd), as.vector(pred)), na.rm=T)-10):(max(c(as.vector(Yd), as.vector(pred)), na.rm=T)+10), 
      (min(c(as.vector(Yd), as.vector(pred)), na.rm=T)-10):(max(c(as.vector(Yd), as.vector(pred)), na.rm=T)+10), 
      lwd=1, col="black")
